{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Cartpole DQN"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Deep Q-Learning Network with Keras and OpenAI Gym, based on [Keon Kim's code](https://github.com/keon/deep-q-learning/blob/master/dqn.py)."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/the-deep-learners/deep-learning-illustrated/blob/master/notebooks/cartpole_dqn.ipynb)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Import dependencies"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Using TensorFlow backend.\n"
     ]
    }
   ],
   "source": [
    "import random\n",
    "import gym\n",
    "import numpy as np\n",
    "from collections import deque\n",
    "from keras.models import Sequential\n",
    "from keras.layers import Dense\n",
    "from keras.optimizers import Adam\n",
    "import os # for creating directories"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Set hyperparameters"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\u001b[33mWARN: gym.spaces.Box autodetected dtype as <class 'numpy.float32'>. Please provide explicit dtype.\u001b[0m\n"
     ]
    }
   ],
   "source": [
    "env = gym.make('CartPole-v0') # initialise environment"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "4"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "state_size = env.observation_space.shape[0]\n",
    "state_size"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "2"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "action_size = env.action_space.n\n",
    "action_size"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "batch_size = 32"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "n_episodes = 1000 # n games we want agent to play "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "output_dir = 'model_output/cartpole/'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "if not os.path.exists(output_dir):\n",
    "    os.makedirs(output_dir)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Define agent"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "class DQNAgent:\n",
    "    def __init__(self, state_size, action_size):\n",
    "        self.state_size = state_size\n",
    "        self.action_size = action_size\n",
    "        self.memory = deque(maxlen=2000) # double-ended queue; acts like list, but elements can be added/removed from either end\n",
    "        self.gamma = 0.95 # decay or discount rate: enables agent to take into account future actions in addition to the immediate ones, but discounted at this rate\n",
    "        self.epsilon = 1.0 # exploration rate: how much to act randomly; more initially than later due to epsilon decay\n",
    "        self.epsilon_decay = 0.995 # decrease number of random explorations as the agent's performance (hopefully) improves over time\n",
    "        self.epsilon_min = 0.01 # minimum amount of random exploration permitted\n",
    "        self.learning_rate = 0.001 # rate at which NN adjusts models parameters via SGD to reduce cost \n",
    "        self.model = self._build_model() # private method \n",
    "    \n",
    "    def _build_model(self):\n",
    "        # neural net to approximate Q-value function:\n",
    "        model = Sequential()\n",
    "        model.add(Dense(32, activation='relu', \n",
    "                        input_dim=self.state_size)) # 1st hidden layer; states as input\n",
    "        model.add(Dense(32, activation='relu')) # 2nd hidden layer\n",
    "        model.add(Dense(self.action_size, activation='linear')) # 2 actions, so 2 output neurons: 0 and 1 (L/R)\n",
    "        model.compile(loss='mse',\n",
    "                      optimizer=Adam(lr=self.learning_rate))\n",
    "        return model\n",
    "    \n",
    "    def remember(self, state, action, reward, next_state, done):\n",
    "        self.memory.append((state, action, \n",
    "                            reward, next_state, done)) # list of previous experiences, enabling re-training later\n",
    "\n",
    "    def train(self, batch_size): # method that trains NN with experiences sampled from memory\n",
    "        minibatch = random.sample(self.memory, batch_size) # sample a minibatch from memory\n",
    "        for state, action, reward, next_state, done in minibatch: # extract data for each minibatch sample\n",
    "            target = reward # if done (boolean whether game ended or not, i.e., whether final state or not), then target = reward\n",
    "            if not done: # if not done, then predict future discounted reward\n",
    "                target = (reward + \n",
    "                          self.gamma * # (target) = reward + (discount rate gamma) * \n",
    "                          np.amax(self.model.predict(next_state)[0])) # (maximum target Q based on future action a')\n",
    "            target_f = self.model.predict(state) # approximately map current state to future discounted reward\n",
    "            target_f[0][action] = target\n",
    "            self.model.fit(state, target_f, epochs=1, verbose=0) # single epoch of training with x=state, y=target_f; fit decreases loss btwn target_f and y_hat\n",
    "        if self.epsilon > self.epsilon_min:\n",
    "            self.epsilon *= self.epsilon_decay\n",
    "\n",
    "    def act(self, state):\n",
    "        if np.random.rand() <= self.epsilon: # if acting randomly, take random action\n",
    "            return random.randrange(self.action_size)\n",
    "        act_values = self.model.predict(state) # if not acting randomly, predict reward value based on current state\n",
    "        return np.argmax(act_values[0]) # pick the action that will give the highest reward (i.e., go left or right?)\n",
    "    \n",
    "    def save(self, name):\n",
    "        self.model.save_weights(name)\n",
    "\n",
    "    def load(self, name):\n",
    "        self.model.load_weights(name)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Interact with environment"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
    "agent = DQNAgent(state_size, action_size) # initialise agent"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "episode: 0/999, score: 19, e: 1.0\n",
      "episode: 1/999, score: 14, e: 1.0\n",
      "episode: 2/999, score: 37, e: 0.99\n",
      "episode: 3/999, score: 11, e: 0.99\n",
      "episode: 4/999, score: 35, e: 0.99\n",
      "episode: 5/999, score: 41, e: 0.98\n",
      "episode: 6/999, score: 18, e: 0.98\n",
      "episode: 7/999, score: 10, e: 0.97\n",
      "episode: 8/999, score: 9, e: 0.97\n",
      "episode: 9/999, score: 24, e: 0.96\n",
      "episode: 10/999, score: 18, e: 0.96\n",
      "episode: 11/999, score: 16, e: 0.95\n",
      "episode: 12/999, score: 16, e: 0.95\n",
      "episode: 13/999, score: 12, e: 0.94\n",
      "episode: 14/999, score: 27, e: 0.94\n",
      "episode: 15/999, score: 22, e: 0.93\n",
      "episode: 16/999, score: 16, e: 0.93\n",
      "episode: 17/999, score: 30, e: 0.92\n",
      "episode: 18/999, score: 16, e: 0.92\n",
      "episode: 19/999, score: 14, e: 0.91\n",
      "episode: 20/999, score: 10, e: 0.91\n",
      "episode: 21/999, score: 19, e: 0.9\n",
      "episode: 22/999, score: 32, e: 0.9\n",
      "episode: 23/999, score: 17, e: 0.9\n",
      "episode: 24/999, score: 34, e: 0.89\n",
      "episode: 25/999, score: 9, e: 0.89\n",
      "episode: 26/999, score: 21, e: 0.88\n",
      "episode: 27/999, score: 10, e: 0.88\n",
      "episode: 28/999, score: 9, e: 0.87\n",
      "episode: 29/999, score: 13, e: 0.87\n",
      "episode: 30/999, score: 16, e: 0.86\n",
      "episode: 31/999, score: 13, e: 0.86\n",
      "episode: 32/999, score: 19, e: 0.86\n",
      "episode: 33/999, score: 24, e: 0.85\n",
      "episode: 34/999, score: 16, e: 0.85\n",
      "episode: 35/999, score: 8, e: 0.84\n",
      "episode: 36/999, score: 34, e: 0.84\n",
      "episode: 37/999, score: 10, e: 0.83\n",
      "episode: 38/999, score: 13, e: 0.83\n",
      "episode: 39/999, score: 29, e: 0.83\n",
      "episode: 40/999, score: 19, e: 0.82\n",
      "episode: 41/999, score: 10, e: 0.82\n",
      "episode: 42/999, score: 11, e: 0.81\n",
      "episode: 43/999, score: 28, e: 0.81\n",
      "episode: 44/999, score: 33, e: 0.81\n",
      "episode: 45/999, score: 13, e: 0.8\n",
      "episode: 46/999, score: 22, e: 0.8\n",
      "episode: 47/999, score: 11, e: 0.79\n",
      "episode: 48/999, score: 14, e: 0.79\n",
      "episode: 49/999, score: 13, e: 0.79\n",
      "episode: 50/999, score: 9, e: 0.78\n",
      "episode: 51/999, score: 15, e: 0.78\n",
      "episode: 52/999, score: 50, e: 0.77\n",
      "episode: 53/999, score: 19, e: 0.77\n",
      "episode: 54/999, score: 12, e: 0.77\n",
      "episode: 55/999, score: 14, e: 0.76\n",
      "episode: 56/999, score: 22, e: 0.76\n",
      "episode: 57/999, score: 16, e: 0.76\n",
      "episode: 58/999, score: 14, e: 0.75\n",
      "episode: 59/999, score: 12, e: 0.75\n",
      "episode: 60/999, score: 25, e: 0.74\n",
      "episode: 61/999, score: 11, e: 0.74\n",
      "episode: 62/999, score: 17, e: 0.74\n",
      "episode: 63/999, score: 12, e: 0.73\n",
      "episode: 64/999, score: 13, e: 0.73\n",
      "episode: 65/999, score: 25, e: 0.73\n",
      "episode: 66/999, score: 17, e: 0.72\n",
      "episode: 67/999, score: 11, e: 0.72\n",
      "episode: 68/999, score: 10, e: 0.71\n",
      "episode: 69/999, score: 9, e: 0.71\n",
      "episode: 70/999, score: 16, e: 0.71\n",
      "episode: 71/999, score: 8, e: 0.7\n",
      "episode: 72/999, score: 14, e: 0.7\n",
      "episode: 73/999, score: 13, e: 0.7\n",
      "episode: 74/999, score: 29, e: 0.69\n",
      "episode: 75/999, score: 21, e: 0.69\n",
      "episode: 76/999, score: 43, e: 0.69\n",
      "episode: 77/999, score: 19, e: 0.68\n",
      "episode: 78/999, score: 14, e: 0.68\n",
      "episode: 79/999, score: 18, e: 0.68\n",
      "episode: 80/999, score: 12, e: 0.67\n",
      "episode: 81/999, score: 14, e: 0.67\n",
      "episode: 82/999, score: 10, e: 0.67\n",
      "episode: 83/999, score: 11, e: 0.66\n",
      "episode: 84/999, score: 30, e: 0.66\n",
      "episode: 85/999, score: 43, e: 0.66\n",
      "episode: 86/999, score: 17, e: 0.65\n",
      "episode: 87/999, score: 9, e: 0.65\n",
      "episode: 88/999, score: 13, e: 0.65\n",
      "episode: 89/999, score: 18, e: 0.64\n",
      "episode: 90/999, score: 14, e: 0.64\n",
      "episode: 91/999, score: 19, e: 0.64\n",
      "episode: 92/999, score: 17, e: 0.63\n",
      "episode: 93/999, score: 17, e: 0.63\n",
      "episode: 94/999, score: 15, e: 0.63\n",
      "episode: 95/999, score: 17, e: 0.62\n",
      "episode: 96/999, score: 12, e: 0.62\n",
      "episode: 97/999, score: 10, e: 0.62\n",
      "episode: 98/999, score: 9, e: 0.61\n",
      "episode: 99/999, score: 9, e: 0.61\n",
      "episode: 100/999, score: 11, e: 0.61\n",
      "episode: 101/999, score: 12, e: 0.61\n",
      "episode: 102/999, score: 15, e: 0.6\n",
      "episode: 103/999, score: 23, e: 0.6\n",
      "episode: 104/999, score: 13, e: 0.6\n",
      "episode: 105/999, score: 19, e: 0.59\n",
      "episode: 106/999, score: 18, e: 0.59\n",
      "episode: 107/999, score: 15, e: 0.59\n",
      "episode: 108/999, score: 12, e: 0.58\n",
      "episode: 109/999, score: 22, e: 0.58\n",
      "episode: 110/999, score: 15, e: 0.58\n",
      "episode: 111/999, score: 14, e: 0.58\n",
      "episode: 112/999, score: 22, e: 0.57\n",
      "episode: 113/999, score: 29, e: 0.57\n",
      "episode: 114/999, score: 33, e: 0.57\n",
      "episode: 115/999, score: 32, e: 0.56\n",
      "episode: 116/999, score: 61, e: 0.56\n",
      "episode: 117/999, score: 20, e: 0.56\n",
      "episode: 118/999, score: 16, e: 0.56\n",
      "episode: 119/999, score: 12, e: 0.55\n",
      "episode: 120/999, score: 13, e: 0.55\n",
      "episode: 121/999, score: 12, e: 0.55\n",
      "episode: 122/999, score: 66, e: 0.55\n",
      "episode: 123/999, score: 27, e: 0.54\n",
      "episode: 124/999, score: 33, e: 0.54\n",
      "episode: 125/999, score: 59, e: 0.54\n",
      "episode: 126/999, score: 41, e: 0.53\n",
      "episode: 127/999, score: 47, e: 0.53\n",
      "episode: 128/999, score: 24, e: 0.53\n",
      "episode: 129/999, score: 36, e: 0.53\n",
      "episode: 130/999, score: 37, e: 0.52\n",
      "episode: 131/999, score: 43, e: 0.52\n",
      "episode: 132/999, score: 32, e: 0.52\n",
      "episode: 133/999, score: 21, e: 0.52\n",
      "episode: 134/999, score: 36, e: 0.51\n",
      "episode: 135/999, score: 34, e: 0.51\n",
      "episode: 136/999, score: 29, e: 0.51\n",
      "episode: 137/999, score: 63, e: 0.51\n",
      "episode: 138/999, score: 40, e: 0.5\n",
      "episode: 139/999, score: 48, e: 0.5\n",
      "episode: 140/999, score: 30, e: 0.5\n",
      "episode: 141/999, score: 41, e: 0.5\n",
      "episode: 142/999, score: 31, e: 0.49\n",
      "episode: 143/999, score: 38, e: 0.49\n",
      "episode: 144/999, score: 90, e: 0.49\n",
      "episode: 145/999, score: 12, e: 0.49\n",
      "episode: 146/999, score: 69, e: 0.48\n",
      "episode: 147/999, score: 38, e: 0.48\n",
      "episode: 148/999, score: 25, e: 0.48\n",
      "episode: 149/999, score: 37, e: 0.48\n",
      "episode: 150/999, score: 53, e: 0.47\n",
      "episode: 151/999, score: 37, e: 0.47\n",
      "episode: 152/999, score: 56, e: 0.47\n",
      "episode: 153/999, score: 44, e: 0.47\n",
      "episode: 154/999, score: 29, e: 0.46\n",
      "episode: 155/999, score: 31, e: 0.46\n",
      "episode: 156/999, score: 68, e: 0.46\n",
      "episode: 157/999, score: 38, e: 0.46\n",
      "episode: 158/999, score: 35, e: 0.46\n",
      "episode: 159/999, score: 24, e: 0.45\n",
      "episode: 160/999, score: 46, e: 0.45\n",
      "episode: 161/999, score: 55, e: 0.45\n",
      "episode: 162/999, score: 83, e: 0.45\n",
      "episode: 163/999, score: 76, e: 0.44\n",
      "episode: 164/999, score: 47, e: 0.44\n",
      "episode: 165/999, score: 83, e: 0.44\n",
      "episode: 166/999, score: 85, e: 0.44\n",
      "episode: 167/999, score: 38, e: 0.44\n",
      "episode: 168/999, score: 32, e: 0.43\n",
      "episode: 169/999, score: 35, e: 0.43\n",
      "episode: 170/999, score: 74, e: 0.43\n",
      "episode: 171/999, score: 90, e: 0.43\n",
      "episode: 172/999, score: 34, e: 0.42\n",
      "episode: 173/999, score: 21, e: 0.42\n",
      "episode: 174/999, score: 44, e: 0.42\n",
      "episode: 175/999, score: 32, e: 0.42\n",
      "episode: 176/999, score: 129, e: 0.42\n",
      "episode: 177/999, score: 34, e: 0.41\n",
      "episode: 178/999, score: 31, e: 0.41\n",
      "episode: 179/999, score: 19, e: 0.41\n",
      "episode: 180/999, score: 28, e: 0.41\n",
      "episode: 181/999, score: 154, e: 0.41\n",
      "episode: 182/999, score: 53, e: 0.4\n",
      "episode: 183/999, score: 45, e: 0.4\n",
      "episode: 184/999, score: 59, e: 0.4\n",
      "episode: 185/999, score: 49, e: 0.4\n",
      "episode: 186/999, score: 82, e: 0.4\n",
      "episode: 187/999, score: 81, e: 0.39\n",
      "episode: 188/999, score: 63, e: 0.39\n",
      "episode: 189/999, score: 38, e: 0.39\n",
      "episode: 190/999, score: 44, e: 0.39\n",
      "episode: 191/999, score: 25, e: 0.39\n",
      "episode: 192/999, score: 52, e: 0.38\n",
      "episode: 193/999, score: 130, e: 0.38\n",
      "episode: 194/999, score: 31, e: 0.38\n",
      "episode: 195/999, score: 70, e: 0.38\n",
      "episode: 196/999, score: 36, e: 0.38\n",
      "episode: 197/999, score: 67, e: 0.37\n",
      "episode: 198/999, score: 62, e: 0.37\n",
      "episode: 199/999, score: 45, e: 0.37\n",
      "episode: 200/999, score: 81, e: 0.37\n",
      "episode: 201/999, score: 63, e: 0.37\n",
      "episode: 202/999, score: 36, e: 0.37\n",
      "episode: 203/999, score: 44, e: 0.36\n",
      "episode: 204/999, score: 30, e: 0.36\n",
      "episode: 205/999, score: 44, e: 0.36\n",
      "episode: 206/999, score: 40, e: 0.36\n",
      "episode: 207/999, score: 69, e: 0.36\n",
      "episode: 208/999, score: 62, e: 0.35\n",
      "episode: 209/999, score: 38, e: 0.35\n",
      "episode: 210/999, score: 43, e: 0.35\n",
      "episode: 211/999, score: 56, e: 0.35\n",
      "episode: 212/999, score: 37, e: 0.35\n",
      "episode: 213/999, score: 63, e: 0.35\n",
      "episode: 214/999, score: 98, e: 0.34\n",
      "episode: 215/999, score: 44, e: 0.34\n",
      "episode: 216/999, score: 28, e: 0.34\n",
      "episode: 217/999, score: 28, e: 0.34\n",
      "episode: 218/999, score: 73, e: 0.34\n",
      "episode: 219/999, score: 13, e: 0.34\n",
      "episode: 220/999, score: 119, e: 0.33\n",
      "episode: 221/999, score: 119, e: 0.33\n",
      "episode: 222/999, score: 56, e: 0.33\n",
      "episode: 223/999, score: 84, e: 0.33\n",
      "episode: 224/999, score: 36, e: 0.33\n",
      "episode: 225/999, score: 44, e: 0.33\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "episode: 226/999, score: 61, e: 0.32\n",
      "episode: 227/999, score: 56, e: 0.32\n",
      "episode: 228/999, score: 36, e: 0.32\n",
      "episode: 229/999, score: 63, e: 0.32\n",
      "episode: 230/999, score: 31, e: 0.32\n",
      "episode: 231/999, score: 29, e: 0.32\n",
      "episode: 232/999, score: 54, e: 0.31\n",
      "episode: 233/999, score: 94, e: 0.31\n",
      "episode: 234/999, score: 57, e: 0.31\n",
      "episode: 235/999, score: 58, e: 0.31\n",
      "episode: 236/999, score: 54, e: 0.31\n",
      "episode: 237/999, score: 45, e: 0.31\n",
      "episode: 238/999, score: 38, e: 0.3\n",
      "episode: 239/999, score: 52, e: 0.3\n",
      "episode: 240/999, score: 42, e: 0.3\n",
      "episode: 241/999, score: 30, e: 0.3\n",
      "episode: 242/999, score: 99, e: 0.3\n",
      "episode: 243/999, score: 93, e: 0.3\n",
      "episode: 244/999, score: 48, e: 0.3\n",
      "episode: 245/999, score: 63, e: 0.29\n",
      "episode: 246/999, score: 16, e: 0.29\n",
      "episode: 247/999, score: 42, e: 0.29\n",
      "episode: 248/999, score: 72, e: 0.29\n",
      "episode: 249/999, score: 69, e: 0.29\n",
      "episode: 250/999, score: 25, e: 0.29\n",
      "episode: 251/999, score: 56, e: 0.29\n",
      "episode: 252/999, score: 40, e: 0.28\n",
      "episode: 253/999, score: 119, e: 0.28\n",
      "episode: 254/999, score: 138, e: 0.28\n",
      "episode: 255/999, score: 52, e: 0.28\n",
      "episode: 256/999, score: 72, e: 0.28\n",
      "episode: 257/999, score: 33, e: 0.28\n",
      "episode: 258/999, score: 60, e: 0.28\n",
      "episode: 259/999, score: 47, e: 0.27\n",
      "episode: 260/999, score: 34, e: 0.27\n",
      "episode: 261/999, score: 28, e: 0.27\n",
      "episode: 262/999, score: 55, e: 0.27\n",
      "episode: 263/999, score: 54, e: 0.27\n",
      "episode: 264/999, score: 152, e: 0.27\n",
      "episode: 265/999, score: 62, e: 0.27\n",
      "episode: 266/999, score: 95, e: 0.26\n",
      "episode: 267/999, score: 51, e: 0.26\n",
      "episode: 268/999, score: 52, e: 0.26\n",
      "episode: 269/999, score: 48, e: 0.26\n",
      "episode: 270/999, score: 43, e: 0.26\n",
      "episode: 271/999, score: 84, e: 0.26\n",
      "episode: 272/999, score: 135, e: 0.26\n",
      "episode: 273/999, score: 54, e: 0.26\n",
      "episode: 274/999, score: 124, e: 0.25\n",
      "episode: 275/999, score: 99, e: 0.25\n",
      "episode: 276/999, score: 110, e: 0.25\n",
      "episode: 277/999, score: 72, e: 0.25\n",
      "episode: 278/999, score: 68, e: 0.25\n",
      "episode: 279/999, score: 71, e: 0.25\n",
      "episode: 280/999, score: 82, e: 0.25\n",
      "episode: 281/999, score: 57, e: 0.25\n",
      "episode: 282/999, score: 132, e: 0.24\n",
      "episode: 283/999, score: 76, e: 0.24\n",
      "episode: 284/999, score: 38, e: 0.24\n",
      "episode: 285/999, score: 47, e: 0.24\n",
      "episode: 286/999, score: 93, e: 0.24\n",
      "episode: 287/999, score: 54, e: 0.24\n",
      "episode: 288/999, score: 65, e: 0.24\n",
      "episode: 289/999, score: 74, e: 0.24\n",
      "episode: 290/999, score: 44, e: 0.23\n",
      "episode: 291/999, score: 44, e: 0.23\n",
      "episode: 292/999, score: 36, e: 0.23\n",
      "episode: 293/999, score: 57, e: 0.23\n",
      "episode: 294/999, score: 60, e: 0.23\n",
      "episode: 295/999, score: 64, e: 0.23\n",
      "episode: 296/999, score: 29, e: 0.23\n",
      "episode: 297/999, score: 86, e: 0.23\n",
      "episode: 298/999, score: 135, e: 0.23\n",
      "episode: 299/999, score: 122, e: 0.22\n",
      "episode: 300/999, score: 58, e: 0.22\n",
      "episode: 301/999, score: 39, e: 0.22\n",
      "episode: 302/999, score: 32, e: 0.22\n",
      "episode: 303/999, score: 32, e: 0.22\n",
      "episode: 304/999, score: 27, e: 0.22\n",
      "episode: 305/999, score: 39, e: 0.22\n",
      "episode: 306/999, score: 39, e: 0.22\n",
      "episode: 307/999, score: 26, e: 0.22\n",
      "episode: 308/999, score: 54, e: 0.21\n",
      "episode: 309/999, score: 39, e: 0.21\n",
      "episode: 310/999, score: 70, e: 0.21\n",
      "episode: 311/999, score: 54, e: 0.21\n",
      "episode: 312/999, score: 51, e: 0.21\n",
      "episode: 313/999, score: 52, e: 0.21\n",
      "episode: 314/999, score: 41, e: 0.21\n",
      "episode: 315/999, score: 38, e: 0.21\n",
      "episode: 316/999, score: 43, e: 0.21\n",
      "episode: 317/999, score: 49, e: 0.21\n",
      "episode: 318/999, score: 42, e: 0.2\n",
      "episode: 319/999, score: 67, e: 0.2\n",
      "episode: 320/999, score: 92, e: 0.2\n",
      "episode: 321/999, score: 62, e: 0.2\n",
      "episode: 322/999, score: 66, e: 0.2\n",
      "episode: 323/999, score: 69, e: 0.2\n",
      "episode: 324/999, score: 70, e: 0.2\n",
      "episode: 325/999, score: 117, e: 0.2\n",
      "episode: 326/999, score: 58, e: 0.2\n",
      "episode: 327/999, score: 152, e: 0.2\n",
      "episode: 328/999, score: 63, e: 0.19\n",
      "episode: 329/999, score: 67, e: 0.19\n",
      "episode: 330/999, score: 107, e: 0.19\n",
      "episode: 331/999, score: 88, e: 0.19\n",
      "episode: 332/999, score: 130, e: 0.19\n",
      "episode: 333/999, score: 129, e: 0.19\n",
      "episode: 334/999, score: 110, e: 0.19\n",
      "episode: 335/999, score: 104, e: 0.19\n",
      "episode: 336/999, score: 101, e: 0.19\n",
      "episode: 337/999, score: 123, e: 0.19\n",
      "episode: 338/999, score: 199, e: 0.18\n",
      "episode: 339/999, score: 15, e: 0.18\n",
      "episode: 340/999, score: 27, e: 0.18\n",
      "episode: 341/999, score: 40, e: 0.18\n",
      "episode: 342/999, score: 66, e: 0.18\n",
      "episode: 343/999, score: 70, e: 0.18\n",
      "episode: 344/999, score: 107, e: 0.18\n",
      "episode: 345/999, score: 173, e: 0.18\n",
      "episode: 346/999, score: 79, e: 0.18\n",
      "episode: 347/999, score: 50, e: 0.18\n",
      "episode: 348/999, score: 49, e: 0.18\n",
      "episode: 349/999, score: 95, e: 0.17\n",
      "episode: 350/999, score: 76, e: 0.17\n",
      "episode: 351/999, score: 68, e: 0.17\n",
      "episode: 352/999, score: 103, e: 0.17\n",
      "episode: 353/999, score: 95, e: 0.17\n",
      "episode: 354/999, score: 160, e: 0.17\n",
      "episode: 355/999, score: 102, e: 0.17\n",
      "episode: 356/999, score: 101, e: 0.17\n",
      "episode: 357/999, score: 97, e: 0.17\n",
      "episode: 358/999, score: 137, e: 0.17\n",
      "episode: 359/999, score: 82, e: 0.17\n",
      "episode: 360/999, score: 150, e: 0.17\n",
      "episode: 361/999, score: 179, e: 0.16\n",
      "episode: 362/999, score: 199, e: 0.16\n",
      "episode: 363/999, score: 199, e: 0.16\n",
      "episode: 364/999, score: 199, e: 0.16\n",
      "episode: 365/999, score: 199, e: 0.16\n",
      "episode: 366/999, score: 168, e: 0.16\n",
      "episode: 367/999, score: 130, e: 0.16\n",
      "episode: 368/999, score: 192, e: 0.16\n",
      "episode: 369/999, score: 102, e: 0.16\n",
      "episode: 370/999, score: 152, e: 0.16\n",
      "episode: 371/999, score: 181, e: 0.16\n",
      "episode: 372/999, score: 171, e: 0.16\n",
      "episode: 373/999, score: 181, e: 0.15\n",
      "episode: 374/999, score: 124, e: 0.15\n",
      "episode: 375/999, score: 199, e: 0.15\n",
      "episode: 376/999, score: 199, e: 0.15\n",
      "episode: 377/999, score: 77, e: 0.15\n",
      "episode: 378/999, score: 117, e: 0.15\n",
      "episode: 379/999, score: 199, e: 0.15\n",
      "episode: 380/999, score: 99, e: 0.15\n",
      "episode: 381/999, score: 115, e: 0.15\n",
      "episode: 382/999, score: 99, e: 0.15\n",
      "episode: 383/999, score: 114, e: 0.15\n",
      "episode: 384/999, score: 86, e: 0.15\n",
      "episode: 385/999, score: 70, e: 0.15\n",
      "episode: 386/999, score: 80, e: 0.15\n",
      "episode: 387/999, score: 73, e: 0.14\n",
      "episode: 388/999, score: 82, e: 0.14\n",
      "episode: 389/999, score: 80, e: 0.14\n",
      "episode: 390/999, score: 91, e: 0.14\n",
      "episode: 391/999, score: 90, e: 0.14\n",
      "episode: 392/999, score: 78, e: 0.14\n",
      "episode: 393/999, score: 199, e: 0.14\n",
      "episode: 394/999, score: 174, e: 0.14\n",
      "episode: 395/999, score: 177, e: 0.14\n",
      "episode: 396/999, score: 199, e: 0.14\n",
      "episode: 397/999, score: 196, e: 0.14\n",
      "episode: 398/999, score: 93, e: 0.14\n",
      "episode: 399/999, score: 199, e: 0.14\n",
      "episode: 400/999, score: 196, e: 0.14\n",
      "episode: 401/999, score: 119, e: 0.13\n",
      "episode: 402/999, score: 199, e: 0.13\n",
      "episode: 403/999, score: 98, e: 0.13\n",
      "episode: 404/999, score: 199, e: 0.13\n",
      "episode: 405/999, score: 199, e: 0.13\n",
      "episode: 406/999, score: 199, e: 0.13\n",
      "episode: 407/999, score: 119, e: 0.13\n",
      "episode: 408/999, score: 139, e: 0.13\n",
      "episode: 409/999, score: 192, e: 0.13\n",
      "episode: 410/999, score: 126, e: 0.13\n",
      "episode: 411/999, score: 199, e: 0.13\n",
      "episode: 412/999, score: 137, e: 0.13\n",
      "episode: 413/999, score: 199, e: 0.13\n",
      "episode: 414/999, score: 199, e: 0.13\n",
      "episode: 415/999, score: 183, e: 0.13\n",
      "episode: 416/999, score: 130, e: 0.12\n",
      "episode: 417/999, score: 19, e: 0.12\n",
      "episode: 418/999, score: 60, e: 0.12\n",
      "episode: 419/999, score: 150, e: 0.12\n",
      "episode: 420/999, score: 199, e: 0.12\n",
      "episode: 421/999, score: 199, e: 0.12\n",
      "episode: 422/999, score: 199, e: 0.12\n",
      "episode: 423/999, score: 199, e: 0.12\n",
      "episode: 424/999, score: 199, e: 0.12\n",
      "episode: 425/999, score: 21, e: 0.12\n",
      "episode: 426/999, score: 65, e: 0.12\n",
      "episode: 427/999, score: 199, e: 0.12\n",
      "episode: 428/999, score: 118, e: 0.12\n",
      "episode: 429/999, score: 33, e: 0.12\n",
      "episode: 430/999, score: 33, e: 0.12\n",
      "episode: 431/999, score: 199, e: 0.12\n",
      "episode: 432/999, score: 148, e: 0.12\n",
      "episode: 433/999, score: 76, e: 0.11\n",
      "episode: 434/999, score: 96, e: 0.11\n",
      "episode: 435/999, score: 85, e: 0.11\n",
      "episode: 436/999, score: 91, e: 0.11\n",
      "episode: 437/999, score: 70, e: 0.11\n",
      "episode: 438/999, score: 121, e: 0.11\n",
      "episode: 439/999, score: 43, e: 0.11\n",
      "episode: 440/999, score: 199, e: 0.11\n",
      "episode: 441/999, score: 117, e: 0.11\n",
      "episode: 442/999, score: 135, e: 0.11\n",
      "episode: 443/999, score: 98, e: 0.11\n",
      "episode: 444/999, score: 177, e: 0.11\n",
      "episode: 445/999, score: 199, e: 0.11\n",
      "episode: 446/999, score: 154, e: 0.11\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "episode: 447/999, score: 199, e: 0.11\n",
      "episode: 448/999, score: 95, e: 0.11\n",
      "episode: 449/999, score: 127, e: 0.11\n",
      "episode: 450/999, score: 166, e: 0.11\n",
      "episode: 451/999, score: 164, e: 0.1\n",
      "episode: 452/999, score: 99, e: 0.1\n",
      "episode: 453/999, score: 68, e: 0.1\n",
      "episode: 454/999, score: 101, e: 0.1\n",
      "episode: 455/999, score: 199, e: 0.1\n",
      "episode: 456/999, score: 199, e: 0.1\n",
      "episode: 457/999, score: 199, e: 0.1\n",
      "episode: 458/999, score: 132, e: 0.1\n",
      "episode: 459/999, score: 108, e: 0.1\n",
      "episode: 460/999, score: 126, e: 0.1\n",
      "episode: 461/999, score: 177, e: 0.1\n",
      "episode: 462/999, score: 199, e: 0.099\n",
      "episode: 463/999, score: 45, e: 0.099\n",
      "episode: 464/999, score: 41, e: 0.098\n",
      "episode: 465/999, score: 108, e: 0.098\n",
      "episode: 466/999, score: 126, e: 0.097\n",
      "episode: 467/999, score: 20, e: 0.097\n",
      "episode: 468/999, score: 19, e: 0.096\n",
      "episode: 469/999, score: 42, e: 0.096\n",
      "episode: 470/999, score: 90, e: 0.095\n",
      "episode: 471/999, score: 72, e: 0.095\n",
      "episode: 472/999, score: 69, e: 0.094\n",
      "episode: 473/999, score: 122, e: 0.094\n",
      "episode: 474/999, score: 132, e: 0.093\n",
      "episode: 475/999, score: 98, e: 0.093\n",
      "episode: 476/999, score: 75, e: 0.092\n",
      "episode: 477/999, score: 100, e: 0.092\n",
      "episode: 478/999, score: 131, e: 0.092\n",
      "episode: 479/999, score: 199, e: 0.091\n",
      "episode: 480/999, score: 149, e: 0.091\n",
      "episode: 481/999, score: 93, e: 0.09\n",
      "episode: 482/999, score: 134, e: 0.09\n",
      "episode: 483/999, score: 125, e: 0.089\n",
      "episode: 484/999, score: 191, e: 0.089\n",
      "episode: 485/999, score: 132, e: 0.088\n",
      "episode: 486/999, score: 113, e: 0.088\n",
      "episode: 487/999, score: 87, e: 0.088\n",
      "episode: 488/999, score: 23, e: 0.087\n",
      "episode: 489/999, score: 35, e: 0.087\n",
      "episode: 490/999, score: 102, e: 0.086\n",
      "episode: 491/999, score: 199, e: 0.086\n",
      "episode: 492/999, score: 43, e: 0.085\n",
      "episode: 493/999, score: 199, e: 0.085\n",
      "episode: 494/999, score: 199, e: 0.084\n",
      "episode: 495/999, score: 199, e: 0.084\n",
      "episode: 496/999, score: 28, e: 0.084\n",
      "episode: 497/999, score: 20, e: 0.083\n",
      "episode: 498/999, score: 40, e: 0.083\n",
      "episode: 499/999, score: 163, e: 0.082\n",
      "episode: 500/999, score: 105, e: 0.082\n",
      "episode: 501/999, score: 184, e: 0.082\n",
      "episode: 502/999, score: 145, e: 0.081\n",
      "episode: 503/999, score: 63, e: 0.081\n",
      "episode: 504/999, score: 36, e: 0.08\n",
      "episode: 505/999, score: 87, e: 0.08\n",
      "episode: 506/999, score: 60, e: 0.08\n",
      "episode: 507/999, score: 45, e: 0.079\n",
      "episode: 508/999, score: 187, e: 0.079\n",
      "episode: 509/999, score: 109, e: 0.078\n",
      "episode: 510/999, score: 176, e: 0.078\n",
      "episode: 511/999, score: 59, e: 0.078\n",
      "episode: 512/999, score: 69, e: 0.077\n",
      "episode: 513/999, score: 75, e: 0.077\n",
      "episode: 514/999, score: 72, e: 0.076\n",
      "episode: 515/999, score: 79, e: 0.076\n",
      "episode: 516/999, score: 199, e: 0.076\n",
      "episode: 517/999, score: 27, e: 0.075\n",
      "episode: 518/999, score: 95, e: 0.075\n",
      "episode: 519/999, score: 199, e: 0.075\n",
      "episode: 520/999, score: 199, e: 0.074\n",
      "episode: 521/999, score: 160, e: 0.074\n",
      "episode: 522/999, score: 199, e: 0.073\n",
      "episode: 523/999, score: 199, e: 0.073\n",
      "episode: 524/999, score: 157, e: 0.073\n",
      "episode: 525/999, score: 163, e: 0.072\n",
      "episode: 526/999, score: 199, e: 0.072\n",
      "episode: 527/999, score: 199, e: 0.072\n",
      "episode: 528/999, score: 28, e: 0.071\n",
      "episode: 529/999, score: 83, e: 0.071\n",
      "episode: 530/999, score: 199, e: 0.071\n",
      "episode: 531/999, score: 199, e: 0.07\n",
      "episode: 532/999, score: 199, e: 0.07\n",
      "episode: 533/999, score: 93, e: 0.069\n",
      "episode: 534/999, score: 115, e: 0.069\n",
      "episode: 535/999, score: 150, e: 0.069\n",
      "episode: 536/999, score: 199, e: 0.068\n",
      "episode: 537/999, score: 164, e: 0.068\n",
      "episode: 538/999, score: 199, e: 0.068\n",
      "episode: 539/999, score: 199, e: 0.067\n",
      "episode: 540/999, score: 152, e: 0.067\n",
      "episode: 541/999, score: 79, e: 0.067\n",
      "episode: 542/999, score: 199, e: 0.066\n",
      "episode: 543/999, score: 114, e: 0.066\n",
      "episode: 544/999, score: 199, e: 0.066\n",
      "episode: 545/999, score: 116, e: 0.065\n",
      "episode: 546/999, score: 177, e: 0.065\n",
      "episode: 547/999, score: 48, e: 0.065\n",
      "episode: 548/999, score: 119, e: 0.064\n",
      "episode: 549/999, score: 100, e: 0.064\n",
      "episode: 550/999, score: 199, e: 0.064\n",
      "episode: 551/999, score: 158, e: 0.063\n",
      "episode: 552/999, score: 199, e: 0.063\n",
      "episode: 553/999, score: 199, e: 0.063\n",
      "episode: 554/999, score: 199, e: 0.063\n",
      "episode: 555/999, score: 138, e: 0.062\n",
      "episode: 556/999, score: 199, e: 0.062\n",
      "episode: 557/999, score: 25, e: 0.062\n",
      "episode: 558/999, score: 46, e: 0.061\n",
      "episode: 559/999, score: 199, e: 0.061\n",
      "episode: 560/999, score: 199, e: 0.061\n",
      "episode: 561/999, score: 199, e: 0.06\n",
      "episode: 562/999, score: 199, e: 0.06\n",
      "episode: 563/999, score: 199, e: 0.06\n",
      "episode: 564/999, score: 165, e: 0.059\n",
      "episode: 565/999, score: 199, e: 0.059\n",
      "episode: 566/999, score: 104, e: 0.059\n",
      "episode: 567/999, score: 25, e: 0.059\n",
      "episode: 568/999, score: 38, e: 0.058\n",
      "episode: 569/999, score: 27, e: 0.058\n",
      "episode: 570/999, score: 74, e: 0.058\n",
      "episode: 571/999, score: 199, e: 0.057\n",
      "episode: 572/999, score: 74, e: 0.057\n",
      "episode: 573/999, score: 199, e: 0.057\n",
      "episode: 574/999, score: 199, e: 0.057\n",
      "episode: 575/999, score: 199, e: 0.056\n",
      "episode: 576/999, score: 199, e: 0.056\n",
      "episode: 577/999, score: 199, e: 0.056\n",
      "episode: 578/999, score: 199, e: 0.055\n",
      "episode: 579/999, score: 22, e: 0.055\n",
      "episode: 580/999, score: 39, e: 0.055\n",
      "episode: 581/999, score: 168, e: 0.055\n",
      "episode: 582/999, score: 30, e: 0.054\n",
      "episode: 583/999, score: 69, e: 0.054\n",
      "episode: 584/999, score: 73, e: 0.054\n",
      "episode: 585/999, score: 68, e: 0.054\n",
      "episode: 586/999, score: 157, e: 0.053\n",
      "episode: 587/999, score: 145, e: 0.053\n",
      "episode: 588/999, score: 199, e: 0.053\n",
      "episode: 589/999, score: 127, e: 0.052\n",
      "episode: 590/999, score: 76, e: 0.052\n",
      "episode: 591/999, score: 45, e: 0.052\n",
      "episode: 592/999, score: 104, e: 0.052\n",
      "episode: 593/999, score: 99, e: 0.051\n",
      "episode: 594/999, score: 22, e: 0.051\n",
      "episode: 595/999, score: 26, e: 0.051\n",
      "episode: 596/999, score: 199, e: 0.051\n",
      "episode: 597/999, score: 191, e: 0.05\n",
      "episode: 598/999, score: 199, e: 0.05\n",
      "episode: 599/999, score: 85, e: 0.05\n",
      "episode: 600/999, score: 72, e: 0.05\n",
      "episode: 601/999, score: 77, e: 0.049\n",
      "episode: 602/999, score: 96, e: 0.049\n",
      "episode: 603/999, score: 76, e: 0.049\n",
      "episode: 604/999, score: 81, e: 0.049\n",
      "episode: 605/999, score: 79, e: 0.048\n",
      "episode: 606/999, score: 54, e: 0.048\n",
      "episode: 607/999, score: 116, e: 0.048\n",
      "episode: 608/999, score: 199, e: 0.048\n",
      "episode: 609/999, score: 176, e: 0.047\n",
      "episode: 610/999, score: 199, e: 0.047\n",
      "episode: 611/999, score: 199, e: 0.047\n",
      "episode: 612/999, score: 49, e: 0.047\n",
      "episode: 613/999, score: 39, e: 0.047\n",
      "episode: 614/999, score: 45, e: 0.046\n",
      "episode: 615/999, score: 34, e: 0.046\n",
      "episode: 616/999, score: 43, e: 0.046\n",
      "episode: 617/999, score: 42, e: 0.046\n",
      "episode: 618/999, score: 44, e: 0.045\n",
      "episode: 619/999, score: 39, e: 0.045\n",
      "episode: 620/999, score: 48, e: 0.045\n",
      "episode: 621/999, score: 58, e: 0.045\n",
      "episode: 622/999, score: 70, e: 0.044\n",
      "episode: 623/999, score: 60, e: 0.044\n",
      "episode: 624/999, score: 136, e: 0.044\n",
      "episode: 625/999, score: 113, e: 0.044\n",
      "episode: 626/999, score: 89, e: 0.044\n",
      "episode: 627/999, score: 19, e: 0.043\n",
      "episode: 628/999, score: 151, e: 0.043\n",
      "episode: 629/999, score: 199, e: 0.043\n",
      "episode: 630/999, score: 199, e: 0.043\n",
      "episode: 631/999, score: 105, e: 0.043\n",
      "episode: 632/999, score: 103, e: 0.042\n",
      "episode: 633/999, score: 46, e: 0.042\n",
      "episode: 634/999, score: 79, e: 0.042\n",
      "episode: 635/999, score: 101, e: 0.042\n",
      "episode: 636/999, score: 77, e: 0.041\n",
      "episode: 637/999, score: 70, e: 0.041\n",
      "episode: 638/999, score: 35, e: 0.041\n",
      "episode: 639/999, score: 91, e: 0.041\n",
      "episode: 640/999, score: 31, e: 0.041\n",
      "episode: 641/999, score: 34, e: 0.04\n",
      "episode: 642/999, score: 24, e: 0.04\n",
      "episode: 643/999, score: 70, e: 0.04\n",
      "episode: 644/999, score: 32, e: 0.04\n",
      "episode: 645/999, score: 36, e: 0.04\n",
      "episode: 646/999, score: 199, e: 0.039\n",
      "episode: 647/999, score: 33, e: 0.039\n",
      "episode: 648/999, score: 19, e: 0.039\n",
      "episode: 649/999, score: 37, e: 0.039\n",
      "episode: 650/999, score: 199, e: 0.039\n",
      "episode: 651/999, score: 35, e: 0.038\n",
      "episode: 652/999, score: 32, e: 0.038\n",
      "episode: 653/999, score: 199, e: 0.038\n",
      "episode: 654/999, score: 35, e: 0.038\n",
      "episode: 655/999, score: 199, e: 0.038\n",
      "episode: 656/999, score: 47, e: 0.038\n",
      "episode: 657/999, score: 53, e: 0.037\n",
      "episode: 658/999, score: 27, e: 0.037\n",
      "episode: 659/999, score: 133, e: 0.037\n",
      "episode: 660/999, score: 116, e: 0.037\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "episode: 661/999, score: 199, e: 0.037\n",
      "episode: 662/999, score: 199, e: 0.036\n",
      "episode: 663/999, score: 94, e: 0.036\n",
      "episode: 664/999, score: 156, e: 0.036\n",
      "episode: 665/999, score: 199, e: 0.036\n",
      "episode: 666/999, score: 159, e: 0.036\n",
      "episode: 667/999, score: 199, e: 0.035\n",
      "episode: 668/999, score: 199, e: 0.035\n",
      "episode: 669/999, score: 36, e: 0.035\n",
      "episode: 670/999, score: 24, e: 0.035\n",
      "episode: 671/999, score: 43, e: 0.035\n",
      "episode: 672/999, score: 199, e: 0.035\n",
      "episode: 673/999, score: 199, e: 0.034\n",
      "episode: 674/999, score: 45, e: 0.034\n",
      "episode: 675/999, score: 68, e: 0.034\n",
      "episode: 676/999, score: 199, e: 0.034\n",
      "episode: 677/999, score: 37, e: 0.034\n",
      "episode: 678/999, score: 28, e: 0.034\n",
      "episode: 679/999, score: 69, e: 0.033\n",
      "episode: 680/999, score: 199, e: 0.033\n",
      "episode: 681/999, score: 199, e: 0.033\n",
      "episode: 682/999, score: 199, e: 0.033\n",
      "episode: 683/999, score: 199, e: 0.033\n",
      "episode: 684/999, score: 199, e: 0.033\n",
      "episode: 685/999, score: 199, e: 0.032\n",
      "episode: 686/999, score: 199, e: 0.032\n",
      "episode: 687/999, score: 199, e: 0.032\n",
      "episode: 688/999, score: 33, e: 0.032\n",
      "episode: 689/999, score: 87, e: 0.032\n",
      "episode: 690/999, score: 53, e: 0.032\n",
      "episode: 691/999, score: 199, e: 0.031\n",
      "episode: 692/999, score: 25, e: 0.031\n",
      "episode: 693/999, score: 199, e: 0.031\n",
      "episode: 694/999, score: 199, e: 0.031\n",
      "episode: 695/999, score: 199, e: 0.031\n",
      "episode: 696/999, score: 199, e: 0.031\n",
      "episode: 697/999, score: 152, e: 0.031\n",
      "episode: 698/999, score: 199, e: 0.03\n",
      "episode: 699/999, score: 199, e: 0.03\n",
      "episode: 700/999, score: 18, e: 0.03\n",
      "episode: 701/999, score: 199, e: 0.03\n",
      "episode: 702/999, score: 199, e: 0.03\n",
      "episode: 703/999, score: 199, e: 0.03\n",
      "episode: 704/999, score: 100, e: 0.029\n",
      "episode: 705/999, score: 122, e: 0.029\n",
      "episode: 706/999, score: 199, e: 0.029\n",
      "episode: 707/999, score: 199, e: 0.029\n",
      "episode: 708/999, score: 199, e: 0.029\n",
      "episode: 709/999, score: 199, e: 0.029\n",
      "episode: 710/999, score: 199, e: 0.029\n",
      "episode: 711/999, score: 199, e: 0.028\n",
      "episode: 712/999, score: 98, e: 0.028\n",
      "episode: 713/999, score: 199, e: 0.028\n",
      "episode: 714/999, score: 199, e: 0.028\n",
      "episode: 715/999, score: 106, e: 0.028\n",
      "episode: 716/999, score: 199, e: 0.028\n",
      "episode: 717/999, score: 181, e: 0.028\n",
      "episode: 718/999, score: 199, e: 0.027\n",
      "episode: 719/999, score: 199, e: 0.027\n",
      "episode: 720/999, score: 128, e: 0.027\n",
      "episode: 721/999, score: 194, e: 0.027\n",
      "episode: 722/999, score: 199, e: 0.027\n",
      "episode: 723/999, score: 121, e: 0.027\n",
      "episode: 724/999, score: 125, e: 0.027\n",
      "episode: 725/999, score: 107, e: 0.027\n",
      "episode: 726/999, score: 199, e: 0.026\n",
      "episode: 727/999, score: 113, e: 0.026\n",
      "episode: 728/999, score: 100, e: 0.026\n",
      "episode: 729/999, score: 19, e: 0.026\n",
      "episode: 730/999, score: 18, e: 0.026\n",
      "episode: 731/999, score: 14, e: 0.026\n",
      "episode: 732/999, score: 24, e: 0.026\n",
      "episode: 733/999, score: 188, e: 0.025\n",
      "episode: 734/999, score: 199, e: 0.025\n",
      "episode: 735/999, score: 199, e: 0.025\n",
      "episode: 736/999, score: 199, e: 0.025\n",
      "episode: 737/999, score: 199, e: 0.025\n",
      "episode: 738/999, score: 199, e: 0.025\n",
      "episode: 739/999, score: 199, e: 0.025\n",
      "episode: 740/999, score: 199, e: 0.025\n",
      "episode: 741/999, score: 100, e: 0.024\n",
      "episode: 742/999, score: 105, e: 0.024\n",
      "episode: 743/999, score: 199, e: 0.024\n",
      "episode: 744/999, score: 199, e: 0.024\n",
      "episode: 745/999, score: 199, e: 0.024\n",
      "episode: 746/999, score: 199, e: 0.024\n",
      "episode: 747/999, score: 199, e: 0.024\n",
      "episode: 748/999, score: 199, e: 0.024\n",
      "episode: 749/999, score: 199, e: 0.024\n",
      "episode: 750/999, score: 199, e: 0.023\n",
      "episode: 751/999, score: 173, e: 0.023\n",
      "episode: 752/999, score: 137, e: 0.023\n",
      "episode: 753/999, score: 199, e: 0.023\n",
      "episode: 754/999, score: 199, e: 0.023\n",
      "episode: 755/999, score: 199, e: 0.023\n",
      "episode: 756/999, score: 199, e: 0.023\n",
      "episode: 757/999, score: 199, e: 0.023\n",
      "episode: 758/999, score: 199, e: 0.022\n",
      "episode: 759/999, score: 199, e: 0.022\n",
      "episode: 760/999, score: 199, e: 0.022\n",
      "episode: 761/999, score: 120, e: 0.022\n",
      "episode: 762/999, score: 199, e: 0.022\n",
      "episode: 763/999, score: 127, e: 0.022\n",
      "episode: 764/999, score: 199, e: 0.022\n",
      "episode: 765/999, score: 199, e: 0.022\n",
      "episode: 766/999, score: 199, e: 0.022\n",
      "episode: 767/999, score: 170, e: 0.022\n",
      "episode: 768/999, score: 159, e: 0.021\n",
      "episode: 769/999, score: 199, e: 0.021\n",
      "episode: 770/999, score: 199, e: 0.021\n",
      "episode: 771/999, score: 199, e: 0.021\n",
      "episode: 772/999, score: 199, e: 0.021\n",
      "episode: 773/999, score: 199, e: 0.021\n",
      "episode: 774/999, score: 199, e: 0.021\n",
      "episode: 775/999, score: 199, e: 0.021\n",
      "episode: 776/999, score: 199, e: 0.021\n",
      "episode: 777/999, score: 199, e: 0.02\n",
      "episode: 778/999, score: 199, e: 0.02\n",
      "episode: 779/999, score: 199, e: 0.02\n",
      "episode: 780/999, score: 199, e: 0.02\n",
      "episode: 781/999, score: 199, e: 0.02\n",
      "episode: 782/999, score: 199, e: 0.02\n",
      "episode: 783/999, score: 199, e: 0.02\n",
      "episode: 784/999, score: 199, e: 0.02\n",
      "episode: 785/999, score: 150, e: 0.02\n",
      "episode: 786/999, score: 182, e: 0.02\n",
      "episode: 787/999, score: 199, e: 0.019\n",
      "episode: 788/999, score: 188, e: 0.019\n",
      "episode: 789/999, score: 199, e: 0.019\n",
      "episode: 790/999, score: 199, e: 0.019\n",
      "episode: 791/999, score: 199, e: 0.019\n",
      "episode: 792/999, score: 199, e: 0.019\n",
      "episode: 793/999, score: 199, e: 0.019\n",
      "episode: 794/999, score: 199, e: 0.019\n",
      "episode: 795/999, score: 199, e: 0.019\n",
      "episode: 796/999, score: 198, e: 0.019\n",
      "episode: 797/999, score: 166, e: 0.019\n",
      "episode: 798/999, score: 199, e: 0.018\n",
      "episode: 799/999, score: 199, e: 0.018\n",
      "episode: 800/999, score: 199, e: 0.018\n",
      "episode: 801/999, score: 199, e: 0.018\n",
      "episode: 802/999, score: 140, e: 0.018\n",
      "episode: 803/999, score: 146, e: 0.018\n",
      "episode: 804/999, score: 199, e: 0.018\n",
      "episode: 805/999, score: 199, e: 0.018\n",
      "episode: 806/999, score: 199, e: 0.018\n",
      "episode: 807/999, score: 199, e: 0.018\n",
      "episode: 808/999, score: 199, e: 0.018\n",
      "episode: 809/999, score: 179, e: 0.017\n",
      "episode: 810/999, score: 176, e: 0.017\n",
      "episode: 811/999, score: 199, e: 0.017\n",
      "episode: 812/999, score: 199, e: 0.017\n",
      "episode: 813/999, score: 199, e: 0.017\n",
      "episode: 814/999, score: 199, e: 0.017\n",
      "episode: 815/999, score: 198, e: 0.017\n",
      "episode: 816/999, score: 199, e: 0.017\n",
      "episode: 817/999, score: 199, e: 0.017\n",
      "episode: 818/999, score: 199, e: 0.017\n",
      "episode: 819/999, score: 199, e: 0.017\n",
      "episode: 820/999, score: 199, e: 0.016\n",
      "episode: 821/999, score: 199, e: 0.016\n",
      "episode: 822/999, score: 199, e: 0.016\n",
      "episode: 823/999, score: 199, e: 0.016\n",
      "episode: 824/999, score: 199, e: 0.016\n",
      "episode: 825/999, score: 199, e: 0.016\n",
      "episode: 826/999, score: 199, e: 0.016\n",
      "episode: 827/999, score: 199, e: 0.016\n",
      "episode: 828/999, score: 199, e: 0.016\n",
      "episode: 829/999, score: 199, e: 0.016\n",
      "episode: 830/999, score: 199, e: 0.016\n",
      "episode: 831/999, score: 199, e: 0.016\n",
      "episode: 832/999, score: 199, e: 0.016\n",
      "episode: 833/999, score: 93, e: 0.015\n",
      "episode: 834/999, score: 102, e: 0.015\n",
      "episode: 835/999, score: 199, e: 0.015\n",
      "episode: 836/999, score: 199, e: 0.015\n",
      "episode: 837/999, score: 199, e: 0.015\n",
      "episode: 838/999, score: 199, e: 0.015\n",
      "episode: 839/999, score: 199, e: 0.015\n",
      "episode: 840/999, score: 199, e: 0.015\n",
      "episode: 841/999, score: 199, e: 0.015\n",
      "episode: 842/999, score: 165, e: 0.015\n",
      "episode: 843/999, score: 197, e: 0.015\n",
      "episode: 844/999, score: 140, e: 0.015\n",
      "episode: 845/999, score: 180, e: 0.015\n",
      "episode: 846/999, score: 199, e: 0.014\n",
      "episode: 847/999, score: 199, e: 0.014\n",
      "episode: 848/999, score: 199, e: 0.014\n",
      "episode: 849/999, score: 199, e: 0.014\n",
      "episode: 850/999, score: 199, e: 0.014\n",
      "episode: 851/999, score: 199, e: 0.014\n",
      "episode: 852/999, score: 199, e: 0.014\n",
      "episode: 853/999, score: 199, e: 0.014\n",
      "episode: 854/999, score: 199, e: 0.014\n",
      "episode: 855/999, score: 199, e: 0.014\n",
      "episode: 856/999, score: 199, e: 0.014\n",
      "episode: 857/999, score: 199, e: 0.014\n",
      "episode: 858/999, score: 199, e: 0.014\n",
      "episode: 859/999, score: 199, e: 0.014\n",
      "episode: 860/999, score: 199, e: 0.013\n",
      "episode: 861/999, score: 199, e: 0.013\n",
      "episode: 862/999, score: 199, e: 0.013\n",
      "episode: 863/999, score: 146, e: 0.013\n",
      "episode: 864/999, score: 199, e: 0.013\n",
      "episode: 865/999, score: 199, e: 0.013\n",
      "episode: 866/999, score: 199, e: 0.013\n",
      "episode: 867/999, score: 198, e: 0.013\n",
      "episode: 868/999, score: 199, e: 0.013\n",
      "episode: 869/999, score: 199, e: 0.013\n",
      "episode: 870/999, score: 199, e: 0.013\n",
      "episode: 871/999, score: 199, e: 0.013\n",
      "episode: 872/999, score: 186, e: 0.013\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "episode: 873/999, score: 199, e: 0.013\n",
      "episode: 874/999, score: 199, e: 0.013\n",
      "episode: 875/999, score: 199, e: 0.013\n",
      "episode: 876/999, score: 199, e: 0.012\n",
      "episode: 877/999, score: 199, e: 0.012\n",
      "episode: 878/999, score: 199, e: 0.012\n",
      "episode: 879/999, score: 199, e: 0.012\n",
      "episode: 880/999, score: 119, e: 0.012\n",
      "episode: 881/999, score: 199, e: 0.012\n",
      "episode: 882/999, score: 199, e: 0.012\n",
      "episode: 883/999, score: 199, e: 0.012\n",
      "episode: 884/999, score: 199, e: 0.012\n",
      "episode: 885/999, score: 199, e: 0.012\n",
      "episode: 886/999, score: 199, e: 0.012\n",
      "episode: 887/999, score: 199, e: 0.012\n",
      "episode: 888/999, score: 199, e: 0.012\n",
      "episode: 889/999, score: 199, e: 0.012\n",
      "episode: 890/999, score: 199, e: 0.012\n",
      "episode: 891/999, score: 199, e: 0.012\n",
      "episode: 892/999, score: 199, e: 0.011\n",
      "episode: 893/999, score: 199, e: 0.011\n",
      "episode: 894/999, score: 199, e: 0.011\n",
      "episode: 895/999, score: 199, e: 0.011\n",
      "episode: 896/999, score: 199, e: 0.011\n",
      "episode: 897/999, score: 158, e: 0.011\n",
      "episode: 898/999, score: 199, e: 0.011\n",
      "episode: 899/999, score: 199, e: 0.011\n",
      "episode: 900/999, score: 199, e: 0.011\n",
      "episode: 901/999, score: 199, e: 0.011\n",
      "episode: 902/999, score: 199, e: 0.011\n",
      "episode: 903/999, score: 157, e: 0.011\n",
      "episode: 904/999, score: 199, e: 0.011\n",
      "episode: 905/999, score: 199, e: 0.011\n",
      "episode: 906/999, score: 199, e: 0.011\n",
      "episode: 907/999, score: 199, e: 0.011\n",
      "episode: 908/999, score: 199, e: 0.011\n",
      "episode: 909/999, score: 199, e: 0.011\n",
      "episode: 910/999, score: 199, e: 0.01\n",
      "episode: 911/999, score: 199, e: 0.01\n",
      "episode: 912/999, score: 199, e: 0.01\n",
      "episode: 913/999, score: 199, e: 0.01\n",
      "episode: 914/999, score: 199, e: 0.01\n",
      "episode: 915/999, score: 199, e: 0.01\n",
      "episode: 916/999, score: 199, e: 0.01\n",
      "episode: 917/999, score: 199, e: 0.01\n",
      "episode: 918/999, score: 199, e: 0.01\n",
      "episode: 919/999, score: 199, e: 0.01\n",
      "episode: 920/999, score: 199, e: 0.01\n",
      "episode: 921/999, score: 90, e: 0.01\n",
      "episode: 922/999, score: 140, e: 0.01\n",
      "episode: 923/999, score: 189, e: 0.01\n",
      "episode: 924/999, score: 187, e: 0.01\n",
      "episode: 925/999, score: 199, e: 0.01\n",
      "episode: 926/999, score: 199, e: 0.01\n",
      "episode: 927/999, score: 199, e: 0.01\n",
      "episode: 928/999, score: 199, e: 0.01\n",
      "episode: 929/999, score: 199, e: 0.01\n",
      "episode: 930/999, score: 199, e: 0.01\n",
      "episode: 931/999, score: 199, e: 0.01\n",
      "episode: 932/999, score: 199, e: 0.01\n",
      "episode: 933/999, score: 199, e: 0.01\n",
      "episode: 934/999, score: 199, e: 0.01\n",
      "episode: 935/999, score: 199, e: 0.01\n",
      "episode: 936/999, score: 199, e: 0.01\n",
      "episode: 937/999, score: 199, e: 0.01\n",
      "episode: 938/999, score: 199, e: 0.01\n",
      "episode: 939/999, score: 199, e: 0.01\n",
      "episode: 940/999, score: 199, e: 0.01\n",
      "episode: 941/999, score: 175, e: 0.01\n",
      "episode: 942/999, score: 199, e: 0.01\n",
      "episode: 943/999, score: 199, e: 0.01\n",
      "episode: 944/999, score: 199, e: 0.01\n",
      "episode: 945/999, score: 199, e: 0.01\n",
      "episode: 946/999, score: 199, e: 0.01\n",
      "episode: 947/999, score: 199, e: 0.01\n",
      "episode: 948/999, score: 199, e: 0.01\n",
      "episode: 949/999, score: 199, e: 0.01\n",
      "episode: 950/999, score: 199, e: 0.01\n",
      "episode: 951/999, score: 199, e: 0.01\n",
      "episode: 952/999, score: 199, e: 0.01\n",
      "episode: 953/999, score: 109, e: 0.01\n",
      "episode: 954/999, score: 150, e: 0.01\n",
      "episode: 955/999, score: 190, e: 0.01\n",
      "episode: 956/999, score: 125, e: 0.01\n",
      "episode: 957/999, score: 163, e: 0.01\n",
      "episode: 958/999, score: 153, e: 0.01\n",
      "episode: 959/999, score: 199, e: 0.01\n",
      "episode: 960/999, score: 199, e: 0.01\n",
      "episode: 961/999, score: 199, e: 0.01\n",
      "episode: 962/999, score: 199, e: 0.01\n",
      "episode: 963/999, score: 199, e: 0.01\n",
      "episode: 964/999, score: 199, e: 0.01\n",
      "episode: 965/999, score: 199, e: 0.01\n",
      "episode: 966/999, score: 199, e: 0.01\n",
      "episode: 967/999, score: 199, e: 0.01\n",
      "episode: 968/999, score: 199, e: 0.01\n",
      "episode: 969/999, score: 199, e: 0.01\n",
      "episode: 970/999, score: 199, e: 0.01\n",
      "episode: 971/999, score: 199, e: 0.01\n",
      "episode: 972/999, score: 174, e: 0.01\n",
      "episode: 973/999, score: 199, e: 0.01\n",
      "episode: 974/999, score: 199, e: 0.01\n",
      "episode: 975/999, score: 199, e: 0.01\n",
      "episode: 976/999, score: 199, e: 0.01\n",
      "episode: 977/999, score: 199, e: 0.01\n",
      "episode: 978/999, score: 167, e: 0.01\n",
      "episode: 979/999, score: 199, e: 0.01\n",
      "episode: 980/999, score: 153, e: 0.01\n",
      "episode: 981/999, score: 199, e: 0.01\n",
      "episode: 982/999, score: 188, e: 0.01\n",
      "episode: 983/999, score: 199, e: 0.01\n",
      "episode: 984/999, score: 199, e: 0.01\n",
      "episode: 985/999, score: 14, e: 0.01\n",
      "episode: 986/999, score: 149, e: 0.01\n",
      "episode: 987/999, score: 199, e: 0.01\n",
      "episode: 988/999, score: 199, e: 0.01\n",
      "episode: 989/999, score: 199, e: 0.01\n",
      "episode: 990/999, score: 199, e: 0.01\n",
      "episode: 991/999, score: 199, e: 0.01\n",
      "episode: 992/999, score: 199, e: 0.01\n",
      "episode: 993/999, score: 199, e: 0.01\n",
      "episode: 994/999, score: 199, e: 0.01\n",
      "episode: 995/999, score: 199, e: 0.01\n",
      "episode: 996/999, score: 199, e: 0.01\n",
      "episode: 997/999, score: 199, e: 0.01\n",
      "episode: 998/999, score: 199, e: 0.01\n",
      "episode: 999/999, score: 199, e: 0.01\n"
     ]
    }
   ],
   "source": [
    "for e in range(n_episodes): # iterate over episodes of gameplay\n",
    "    \n",
    "    state = env.reset() # reset state at start of each new episode of the game\n",
    "    state = np.reshape(state, [1, state_size])\n",
    "    \n",
    "    done = False\n",
    "    time = 0 # time represents a frame of the episode; goal is to keep pole upright as long as possible\n",
    "    while not done: \n",
    "#         env.render()\n",
    "        action = agent.act(state) # action is either 0 or 1 (move cart left or right); decide on one or other here\n",
    "        next_state, reward, done, _ = env.step(action) # agent interacts with env, gets feedback; 4 state data points, e.g., pole angle, cart position        \n",
    "        reward = reward if not done else -10 # reward +1 for each additional frame with pole upright        \n",
    "        next_state = np.reshape(next_state, [1, state_size])\n",
    "        agent.remember(state, action, reward, next_state, done) # remember the previous timestep's state, actions, reward, etc.        \n",
    "        state = next_state # set \"current state\" for upcoming iteration to the current next state        \n",
    "        if done: # if episode ends: \n",
    "            print(\"episode: {}/{}, score: {}, e: {:.2}\" # print the episode's score and agent's epsilon\n",
    "                  .format(e, n_episodes-1, time, agent.epsilon))\n",
    "        time += 1\n",
    "    if len(agent.memory) > batch_size:\n",
    "        agent.train(batch_size) # train the agent by replaying the experiences of the episode\n",
    "    if e % 50 == 0:\n",
    "        agent.save(output_dir + \"weights_\" \n",
    "                   + '{:04d}'.format(e) + \".hdf5\") "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [],
   "source": [
    "# saved agents can be loaded with agent.load(\"./path/filename.hdf5\") "
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
